<?php

namespace plugin\ai\app\admin\controller;

use plugin\ai\api\Embedding;
use plugin\ai\app\model\AiEmbedding;
use support\Redis;
use support\Request;
use support\Response;
use plugin\ai\app\model\AiDataset;
use plugin\admin\app\controller\Crud;
use support\exception\BusinessException;
use Throwable;
use Workerman\Timer;
use ZipArchive;

/**
 * AI训练集 
 */
class AiDatasetController extends Crud
{
    
    /**
     * @var AiDataset
     */
    protected $model = null;

    /**
     * 构造函数
     * @return void
     */
    public function __construct()
    {
        $this->model = new AiDataset;
    }
    
    /**
     * 浏览
     * @return Response
     */
    public function index(): Response
    {
        return view('ai-dataset/index');
    }

    /**
     * 插入
     * @param Request $request
     * @return Response
     * @throws BusinessException
     */
    public function insert(Request $request): Response
    {
        if (!class_exists("Illuminate\\Redis\\RedisManager")) {
            throw new BusinessException('请安装illuminate/redis组件，composer require illuminate/redis');
        }
        if ($request->method() === 'POST') {
            return parent::insert($request);
        }
        return view('ai-dataset/insert');
    }

    /**
     * 查询
     * @param Request $request
     * @return Response
     */
    public function select(Request $request): Response
    {
        [$where, $format, $limit, $field, $order] = $this->selectInput($request);
        if (!$field) {
            $field = 'id';
            $order = 'desc';
        }
        $query = $this->doSelect($where, $field, $order);
        return $this->doFormat($query, $format, $limit);
    }

    /**
     * 更新
     * @param Request $request
     * @return Response
     * @throws BusinessException
    */
    public function update(Request $request): Response
    {
        if ($request->method() === 'POST') {
            $dataset = AiDataset::find($request->post('id'));
            if (!$dataset) {
                throw new BusinessException('数据集不存在');
            }
            $needTrain = $request->post('model') !== $dataset->model;
            $ret = parent::update($request);
            // 切换embedding模型时，需要重新训练
            if ($needTrain) {
                AiEmbedding::where('dataset_id', $dataset->id)->update(['text_embedding' => '']);
                $index = "ai-embedding-$dataset->id";
                $keysToDelete = Redis::connection('plugin.ai.default')->keys("$index:*");
                Redis::connection('plugin.ai.default')->del($keysToDelete);
                Redis::connection('plugin.ai.default')->rawCommand('FT.DROPINDEX', $index);
                $this->tryTrain($dataset->id);
            }
            return $ret;
        }
        return view('ai-dataset/update');
    }

    /**
     * 导入
     * @param Request $request
     * @return Response
     */
    public function import(Request $request): Response
    {
        if ($request->method() === 'GET') {
            $datasetId = $request->get('dataset_id');
            return view('ai-dataset/import', ['dataset' => $datasetId]);
        }
        $datasetId = $request->post('dataset_id');
        $files = $request->file('file');
        foreach ($files as $file) {
            if (!$file || !$file->isValid()) {
                return $this->json(1, '文件上传失败');
            }
        }

        $needTrain = false;
        foreach ($files as $file) {
            $fileExt = strtolower($file->getUploadExtension());
            $filename = $file->getUploadName();
            if (in_array($fileExt, ['txt', 'md'])) {
                $text = file_get_contents($file);
                if ($text && !AiEmbedding::where(['dataset_id' => $datasetId, 'text' => $text])->first()) {
                    $embedding = new AiEmbedding();
                    $embedding->text = $text;
                    $embedding->dataset_id = $datasetId;
                    $embedding->filename = static::convertToUtf8($filename);
                    $embedding->save();
                    $needTrain = true;
                }
            } else if ($fileExt === 'zip') {
                $zip = new ZipArchive;
                if ($zip->open($file)) {
                    for ($i = 0; $i < $zip->numFiles; $i++) {
                        $filename = $zip->getNameIndex($i);
                        $fileExt = pathinfo($filename, PATHINFO_EXTENSION);
                        if (in_array($fileExt, ['txt', 'md'])) {
                            $text = $zip->getFromIndex($i);
                            if ($text && !AiEmbedding::where(['dataset_id' => $datasetId, 'text' => $text])->first()) {
                                $embedding = new AiEmbedding();
                                $embedding->text = $text;
                                $embedding->dataset_id = $datasetId;
                                $embedding->filename = static::convertToUtf8(pathinfo($filename, PATHINFO_BASENAME));
                                $embedding->save();
                                $needTrain = true;
                            }
                        }
                    }
                    $zip->close();
                } else {
                    return $this->json(1, '无法解析zip压缩文件');
                }
            }
        }
        if ($needTrain) {
            $this->tryTrain($datasetId);
        }

        return $this->json(0, 'ok');
    }

    /**
     * 训练
     * @param Request $request
     * @return Response
     */
    public function train(Request $request): Response
    {
        $datasetId = $request->post('dataset_id');
        $status = $request->post('status');
        if ($status === 'stopped') {
            $dataset = AiDataset::find($datasetId);
            if (!$dataset) {
                return $this->json(1, '数据集不存在');
            }
            $dataset->status = 'stopped';
            $dataset->save();
            return $this->json(0, 'ok');
        }
        $this->tryTrain($datasetId);
        return $this->json(0, 'ok');
    }

    /**
     * 尝试训练
     * @param $datasetId
     * @return void
     */
    protected function tryTrain($datasetId)
    {
        // 如果状态为stopped，则开始训练，否则不执行训练
        $dataset = AiDataset::find($datasetId);
        if (!$dataset || $dataset->status === 'training') {
            return;
        }
        $dataset->status = 'training';
        $dataset->save();
        $this->doTrain($datasetId, 0);
    }

    /**
     * 执行训练
     * @param $datasetId
     * @param $index
     * @return void
     */
    protected function doTrain($datasetId, $index)
    {
        $dataset = AiDataset::find($datasetId);
        if (!$dataset || $dataset->status === 'stopped') {
            return;
        }
        $item = AiEmbedding::where(function($query) {
            $query-> where('text_embedding', '')->orWhereNull('text_embedding');
        })->where('dataset_id', $datasetId)->where('id', '>', $index)->first();
        if (!$item) {
            $dataset->status = 'stopped';
            $dataset->save();
            return;
        }
        $index = $item->id;
        $delay = $dataset->delay;
        Embedding::create([
            'model' => $dataset->model,
            'input' => $item->text,
            'encodding_format' => 'float'
        ],[
            'complete' => function ($data) use ($index, $dataset, $datasetId, $delay) {
                $embedding = AiEmbedding::find($index);
                $buffer = json_encode($data, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES);
                if ($embedding) {
                    if (!isset($data['data'][0]['embedding'])) {
                        $embedding->log = $buffer;
                        $embedding->save();
                    } else {
                        $textEmbedding = $data['data'][0]['embedding'];
                        $dimension = count($textEmbedding);
                        // 判断redis里的ai-embedding-$dataset索引是否存在
                        $index = "ai-embedding-$datasetId";
                        try {
                            $indexExist = Redis::connection('plugin.ai.default')->rawCommand('FT.INFO', $index);
                        } catch (Throwable $e) {
                            $indexExist = false;
                        }
                        if(!$indexExist) {
                            Redis::connection('plugin.ai.default')->rawCommand('FT.CREATE', $index, 'on', 'JSON', 'PREFIX', '1', "$index:", 'SCHEMA',
                                '$.text_embedding', 'AS', 'text_embedding', 'VECTOR', 'FLAT', '6', 'DIM', $dimension, 'DISTANCE_METRIC', 'COSINE', 'TYPE', 'FLOAT32');
                        }
                        $embedding->text_embedding = json_encode($textEmbedding, JSON_UNESCAPED_UNICODE);
                        $embedding->log = '';
                        $embedding->save();
                        $key = "ai-embedding-$datasetId:$embedding->id";
                        $value = [
                            "id" => $embedding->id,
                            "text_embedding" => $textEmbedding
                        ];
                        Redis::connection('plugin.ai.default')->rawCommand('JSON.SET', $key, '$', json_encode($value, JSON_UNESCAPED_UNICODE));
                    }
                }
                if ($delay) {
                    Timer::add($delay, function () use ($datasetId, $index) {
                        $this->doTrain($datasetId, $index);
                    }, null, false);
                } else {
                    $this->doTrain($datasetId, $index);
                }
            }
        ]);
    }

    /**
     * 删除
     * @param Request $request
     * @return Response
     * @throws BusinessException
     */
    public function delete(Request $request): Response
    {
        $result =  parent::delete($request);
        $id = $request->post('id');
        AiEmbedding::where('dataset_id', $id)->delete();
        try {
            $index = "ai-embedding-$id";
            $keysToDelete = Redis::connection('plugin.ai.default')->keys("$index:*");
            Redis::connection('plugin.ai.default')->del($keysToDelete);
            Redis::connection('plugin.ai.default')->rawCommand('FT.DROPINDEX', $index);
        } catch (Throwable $e) {}
        return $result;
    }

    /**
     * 获取数据集列表
     * @return Response
     */
    public function list(): Response
    {
        $items = [];
        foreach (AiDataset::get() as $item) {
            $items[] = [
                'name' => $item['name'],
                'value' => $item['id']
            ];
        }
        return $this->json(0, 'ok', $items);
    }

    /**
     * @param $content
     * @return array|false|string|string[]|null
     */
    public static function convertToUtf8($content)
    {
        if (!$content || !function_exists('mb_convert_encoding')) {
            return $content;
        }
        return mb_convert_encoding($content, 'UTF-8');
    }

}
